# weights
weight_IPW <- function(treatment, G, W_sum, prob =  0.5){
  sum_t <- G*W_sum
  prob_use0 <- choose(W_sum, sum_t)*(prob^sum_t)*((1-prob)^(W_sum - sum_t))
  treat_prob <- rep(prob, length(treatment))
  treat_prob[treatment == 0] <- 1  -  prob
  prob_use <- prob_use0*treat_prob
  weights  <- 1/prob_use
  weights  <- weights/mean(weights)
  return(weights)
}

# ANSE estimator
ANSE <- function(formula, d = 0, exp = c(0.8, 0.2),
                 data, weights, prob, clusters, type = "model", pol = 1){
  
  Formula_in <- Formula(formula)
  formula_in <- formula(terms(Formula_in))
  
  if(type ==  "model"){
    if(missing(weights)) stop("Please specify 'weights' ")
  }
  
  if(type ==  "design"){
    if(missing(prob)) stop("Please specify 'prob' ")
  }
  
  ## convert formula
  f1 <- formula(Formula_in, rhs = 1)
  f_net <- formula(Formula_in, rhs = 2)
  
  ## extract variable names
  outcome_name    <- all.vars(f1)[1]
  treat_name  <- all.vars(f1)[2]
  exp_name <-  all.vars(f_net)[2]
  
  if(type == "design"){  # Hajek
    Y <- data[, outcome_name]  
    treat_ind <- data[, treat_name]
    exp_high_ind <- as.numeric(treat_ind == d)*as.numeric(data[, exp_name] == exp[1])
    exp_low_ind <- as.numeric(treat_ind == d)*as.numeric(data[, exp_name] == exp[2])
    weights <- 1/prob
    
    # estimate
    m_h <- sum((Y*weights)*exp_high_ind)/sum(weights[exp_high_ind == 1])
    m_l <- sum((Y*weights)*exp_low_ind)/sum(weights[exp_low_ind == 1])
    est <- m_h - m_l
    
    # Y_res
    Y_res <- Y 
    Y_res[exp_high_ind == 1] <- Y_res[exp_high_ind == 1] - m_h
    Y_res[exp_low_ind == 1] <- Y_res[exp_low_ind == 1] - m_l
    
    V_h <- sum(((Y_res/prob)^2)*(1-prob)*exp_high_ind)/(length(Y_res)^2)
    V_l <- sum(((Y_res/prob)^2)*(1-prob)*exp_low_ind)/(length(Y_res)^2)
    cov_hl <- - sum((Y_res^2)/(2*prob))/(length(Y_res)^2)
    se_HT <- sqrt(max(V_h + V_l - 2*cov_hl, 0.0001))
    out <- c(est, se_HT, est - 1.96*se_HT, est + 1.96*se_HT)
    names(out) <- c("Estimate", "Std. Error", "low.95ci", "high.95ci")
    lm_main  <- NULL
    design_output <- c(m_h, m_l, V_h, V_l, cov_hl)
    
  }else if(type == "model"){
    # linear model 
    if(length(Formula_in)[2] == 3){
      # with covariates
      f2 <- formula(Formula_in, lhs = 0, rhs = 3)
      x_for <- paste(all.vars(f2), collapse = "+")
      if(pol == 1){
        formula_use <- as.formula(paste(outcome_name, "~ treat_internal*",
                                        exp_name, "+", x_for,  sep =  ""))
      }else if(pol  == 2){
        formula_use <- as.formula(paste(outcome_name, 
                                        "~ treat_internal*", exp_name, 
                                        "~ treat_internal*I(", exp_name,"^2)",
                                        "+", x_for, sep =  ""))
      }
    }else{
      # without covariates
      if(pol == 1){
        formula_use <- as.formula(paste(outcome_name, "~ treat_internal*",
                                        exp_name, sep =  ""))
      }else if(pol  == 2){
        formula_use <- as.formula(paste(outcome_name, 
                                        "~ treat_internal*", exp_name, 
                                        "~ treat_internal*I(", exp_name,"^2)",
                                        sep =  ""))
      }
    }
    # internal coding 
    if(d == 1){
      data$treat_internal <- 1- data[ , treat_name]
    }else if(d == 0){
      data$treat_internal <- data[ , treat_name]
    }
    weights[weights > 10] <- 10 # trim weights
    # Estimate the model
    if(!missing(clusters)){
      lm_main <- lm_robust(formula_use, data = data, weights = weights,  clusters = clusters)
    }else{
      lm_main <- lm_robust(formula_use, data = data, weights = weights)
    }
    if(pol == 1){
      out_b <- (exp[1] - exp[2])*summary(lm_main)$coef[exp_name, c(1, 2)]
    }else if(pol == 2){
      exp_name_sq <- paste("I(", exp_name,"^2)", sep="")
      est_1 <- (exp[1] - exp[2])*summary(lm_main)$coef[exp_name, 1]
      est_2 <- (exp[1]^2 - exp[2]^2)*summary(lm_main)$coef[exp_name_sq, 1]
      est_all <- est_1 + est_2 
      vcov_base <- vcov(lm_main)[c(exp_name, exp_name_sq), c(exp_name, exp_name_sq)]
      var_all <- (exp[1] - exp[2])^2*vcov_base[1,1] + (exp[1]^2 - exp[2]^2)^2*vcov_base[2,2] +
        2*(exp[1] - exp[2])*(exp[1]^2 - exp[2]^2)*vcov_base[1,2]
      out_b <- c(est_all, sqrt(var_all))
    }
    
    out <- c(out_b[1], out_b[2], out_b[1] - 1.96*out_b[2], out_b[1] + 1.96*out_b[2])
    names(out) <- c("Estimate", "Std. Error", "low.95ci", "high.95ci")
    design_output <- NULL
  }
  
  output <- list("out" = out, "lm" = lm_main,  "data_internal" = data, 
                 "design_output"  =  design_output, "type" = type)
  return(output)
}

# Used for Nonparametric sensitivity analysis
nonpara_ANSE <- function(out_ANSE, exp_name, 
                         exp, MR_UY = 1.3, RR_GU = 1.3, sim_size = 1000, seed = 1234){
  type <- out_ANSE$type
  set.seed(seed)
  
  if(type == "model"){
    # input
    lm_main  <- out_ANSE$lm
    data_internal  <- out_ANSE$data_internal
    
    newdata_high <- newdata_low  <- data_internal
    newdata_high[, "treat_internal"] <- newdata_low[, "treat_internal"] <-  0 
    newdata_high[, exp_name] <- exp[1]
    newdata_low[, exp_name] <- exp[2]
    
    df_high <- model.frame(formula(lm_main), data = newdata_high)
    x_high <- model.matrix(formula(lm_main), data = newdata_high)
    df_low <- model.frame(formula(lm_main), data = newdata_low)
    x_low <- model.matrix(formula(lm_main), data = newdata_low)
    
    # simulate 
    coef_mat <- mvrnorm(n= sim_size, mu = coef(lm_main), Sigma = lm_main$vcov)
    m_high <- apply(coef_mat %*% t(x_high), 1, mean)
    m_low <- apply(coef_mat %*% t(x_low), 1, mean)
    
    B  <- (MR_UY*RR_GU)/(MR_UY + RR_GU - 1)
    
    lower <- m_high/B  - m_low*B
    upper <- m_high*B  - m_low/B
    
    output <- list("lower_mean" = mean(lower), "lower_se" = sd(lower), "lower" = lower,
                   "upper_mean" = mean(upper), "upper_se" = sd(upper), "upper" = upper)
  }else if(type == "design"){
    design_output <- out_ANSE$design_output # c(m_h, m_l, V_h, V_l, cov_hl)
    B  <- (MR_UY*RR_GU)/(MR_UY + RR_GU - 1)
    
    lower_mean <-  design_output[1]/B - design_output[2]*B
    upper_mean <-  design_output[1]*B - design_output[2]/B
    lower_se   <-  sqrt(max(design_output[3]/(B^2) + design_output[4]*(B^2) -2*design_output[5],
                            0.0001))
    upper_se   <-  sqrt(max(design_output[3]*(B^2) + design_output[4]/(B^2) -2*design_output[5],
                            0.0001))
    output <- list("lower_mean" = lower_mean, "lower_se" = lower_se,
                   "upper_mean" = upper_mean, "upper_se" = upper_se)
    
  }
  return(output)
}
